rm(list=ls(all=TRUE))
set.seed(12345)
# load in a function to create clustered standard errors for mlogit models
# initial code by Mahmood Ara: http://thetarzan.wordpress.com/2011/06/11/clustered-standard-errors-in-r/
# slightly modified for mlogit models by Justin Esarey on 3/3/2015
# and for mnlogit by JAGuerra
#root <- "S:/NetworkParish/HPC/"

#root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/" #root <- "C:/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))

codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
source(paste(codes,"parseFormulajg.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
PlaceboCoords <- 0

if( PlaceboCoords==1 ){
####Placebo Coords
 if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/Placebo/Coords/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/Placebo/Coords/KS/",sep="")}
 
  allindexexer <- c("all","civil","social") 
} else {
  ####Robust and original
  if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/Restat/KS/",sep="")}  
  allindexexer <- c("w50list","atleast1","w100list","within50All","nmovers","age1530","wcontinuous","agecohorts1530","agecohorts1545","interactnmovers","Father","Fatherocc","Sibling","Siblingocc")
} 

for(indexexer in allindexexer) {
  #indexexer <- "all"
  sink(paste(rootsave,"NAPPEstimationHetewfenewCluster",indexexer,".txt",sep="")) 
  options(max.print=1000000)
  gc()
  load(paste(rootsave,"PMLmtwfinalnew",indexexer,".RData",sep=""))
####
  
  xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
  wvar <- "sw"
  fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))          
  if (indexexer=="interactnmovers") {
    fformula1 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar, "+","stayerp",":",wvar,sep=""))) 
    ######################
  # To get gradient matrix (Empirical Estimating Functions) from mnlogit object
  #The estimating function (or score function) for a model 
  #is the derivative of the objective function with respect 
  #to the parameter vector. The empirical estimating functions 
  #is the evaluation of the estimating function at the observed data 
  #(n observations) and the estimated parameters (of dimension k)
  #########################
  #for mnlogit the below program should give the same result as coeftest(vt.mod, vcovCLvt)
    source(paste(codes,"estfunjgv2.R",sep=""))
  } else { 
    if (indexexer=="Fatherocc") fformula1 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"+","factor(father_cat)","|", wvar ,sep="")))  
    if (indexexer=="Siblingocc") fformula1 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"+",paste("shsib",c(1:5),collapse= "+",sep=""),"|", wvar ,sep="")))
    source(paste(codes,"estfunjg.R",sep=""))
  }
  estfunjgmn <- estfunjg(ml.m.w,fformula1)
  vcovmn <- vcov(ml.m.w)
  Lmn <- length(unique(ml.m.w$model$alt))
  Mmn <- length(unique(ml.m.w$model$IDSocial))
  Nmn <- length(ml.m.w$model$IDSocial)/Lmn
  Kmn <- length(coefficients(ml.m.w))
  #dfc <- (Mmn/(Mmn-1))*((Nmn-1)/(Nmn-Kmn))
  dfcmn <- (Mmn/(Mmn-1))
  require(lmtest)
  cluster <- ml.m.w$model$IDSocial[c(1:Nmn)]
  cluster <- as.numeric(cluster)
  ujmn  <- apply(estfunjgmn,2, function(x) tapply(x, cluster, sum)) #cluster at parish level, but given fe there is negative intracluster correlation
  breadmn <- vcovmn * length(residuals(ml.m.w))
  meatmn <-  (crossprod(ujmn)/Nmn)
  nmn <- NROW(estfunjgmn)
  vcovCLmn <- dfcmn*(1/nmn * (breadmn %*% meatmn %*% breadmn))
  print(coeftest(ml.m.w, vcovCLmn))
  #To test coefficients are equal to split-panel jackkinfe estimates
  #library("car")
  #linearHypothesis(ml.m.w, c('sw:0=0.7622','sw:1=2.38915','sw:2=5.09615', 'sw:3=4.90845', 'sw:4=3.8569', 'sw:5=2.199395'))
  
  sink()
}


###########################################
## WO fe per parish
###########################################
if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
#indexexer <- "w50list" #Father Fatherocc
for(indexexer in c("w50list","Father","Fatherocc","Sibling","Siblingocc")) {
  #to get IDSocial as cluster
  load(paste(rootsave,"PMLmtwfinalnew",indexexer,".RData",sep=""))
  cluster <- ml.m.w$model$IDSocial
  ###Heterogeneous
  sink(paste(rootsave,"NAPPEstimationHetewofenewCluster",indexexer,".txt",sep="")) 
  options(max.print=1000000)
  gc()
  
  load(paste(rootsave,"PMLmwfinalnew",indexexer,".RData",sep=""))
  ####
  
  xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
  wvar <- "sw"
  fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
  if (indexexer =="Fatherocc") fformula1 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"+","factor(father_cat)","|", wvar ,sep="")))
  if (indexexer=="Siblingocc") fformula1 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"+",paste("shsib",c(1:5),collapse= "+",sep=""),"|", wvar ,sep="")))
    ######################
  # To get gradient matrix (Empirical Estimating Functions) from mnlogit object
  #The estimating function (or score function) for a model 
  #is the derivative of the objective function with respect 
  #to the parameter vector. The empirical estimating functions 
  #is the evaluation of the estimating function at the observed data 
  #(n observations) and the estimated parameters (of dimension k)
  #########################
  #for mnlogit the below program should give the same result as coeftest(vt.mod, vcovCLvt)
  source(paste(codes,"estfunjg.R",sep=""))
  estfunjgmn <- estfunjg(ml.m.w,fformula1)
  vcovmn <- vcov(ml.m.w)
  Lmn <- length(unique(ml.m.w$model$alt))
  Mmn <- length(unique(cluster))
  Nmn <- length(cluster)/Lmn
  Kmn <- length(coefficients(ml.m.w))
  #dfc <- (Mmn/(Mmn-1))*((Nmn-1)/(Nmn-Kmn))
  dfcmn <- (Mmn/(Mmn-1))
  require(lmtest)
  cluster <- cluster[c(1:Nmn)]
  cluster <- as.numeric(cluster)
  ujmn  <- apply(estfunjgmn,2, function(x) tapply(x, cluster, sum)) #cluster at parish level, but given fe there is negative intracluster correlation
  breadmn <- vcovmn * length(residuals(ml.m.w))
  meatmn <-  (crossprod(ujmn)/Nmn)
  nmn <- NROW(estfunjgmn)
  vcovCLmn <- dfcmn*(1/nmn * (breadmn %*% meatmn %*% breadmn))
  print(coeftest(ml.m.w, vcovCLmn))
  sink()
}

###Homogeneous
sink(paste(rootsave,"NAPPEstimationHomowofenewCluster",indexexer,".txt",sep="")) 
options(max.print=1000000)
gc()

load(paste(rootsave,"PMLmwhomofinal.RData",sep=""))
####

xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
wvar <- "sw"
fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          

######################
# To get gradient matrix (Empirical Estimating Functions) from mnlogit object
#The estimating function (or score function) for a model 
#is the derivative of the objective function with respect 
#to the parameter vector. The empirical estimating functions 
#is the evaluation of the estimating function at the observed data 
#(n observations) and the estimated parameters (of dimension k)
#########################
#for mnlogit the below program should give the same result as coeftest(vt.mod, vcovCLvt)
source(paste(codes,"estfunjg.R",sep=""))
estfunjgmn <- estfunjg(ml.m.w,fformula1)
vcovmn <- vcov(ml.m.w)
Lmn <- length(unique(ml.m.w$model$alt))
Mmn <- length(unique(cluster))
Nmn <- length(cluster)/Lmn
Kmn <- length(coefficients(ml.m.w))
#dfc <- (Mmn/(Mmn-1))*((Nmn-1)/(Nmn-Kmn))
dfcmn <- (Mmn/(Mmn-1))
require(lmtest)
cluster <- cluster[c(1:Nmn)]
cluster <- as.numeric(cluster)
ujmn  <- apply(estfunjgmn,2, function(x) tapply(x, cluster, sum)) #cluster at parish level, but given fe there is negative intracluster correlation
breadmn <- vcovmn * length(residuals(ml.m.w))
meatmn <-  (crossprod(ujmn)/Nmn)
nmn <- NROW(estfunjgmn)
vcovCLmn <- dfcmn*(1/nmn * (breadmn %*% meatmn %*% breadmn))
print(coeftest(ml.m.w, vcovCLmn))
sink()

###########################################
## HETEROGENEOUS
## 1. Without endogenous effects
## 2. Without contextual effects
###########################################
#AM <- 0
rootsave <- paste(root,"Results/ReStat/",sep="")
#if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
indexexer <- "w50list"
#for(indexexer in c("w50list")) {
sink(paste(rootsave,"NAPPEstimationHetewfenewdeltagammaCluster",indexexer,".txt",sep="")) 
options(max.print=1000000)
gc()
#to get IDSocial as cluster
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
wvar <- "sw"
woexercises <- c("wodelta","wogamma")
for(woexer in woexercises){
  if(woexer == "wogamma"){
    load(paste(rootsave,"Mlmtwogammanew.RData",sep=""))
    fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),sep="")))            
  } else{
    load(paste(rootsave,"KS/","PMLmtwodeltafinalnew.RData",sep=""))
    fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+","factor(IDSocial)"),"|", wvar ,sep="")))            
  }
  print(paste("Additional exercises",woexer,sep=""))
  ######################
  # To get gradient matrix (Empirical Estimating Functions) from mnlogit object
  #The estimating function (or score function) for a model 
  #is the derivative of the objective function with respect 
  #to the parameter vector. The empirical estimating functions 
  #is the evaluation of the estimating function at the observed data 
  #(n observations) and the estimated parameters (of dimension k)
  #########################
  #for mnlogit the below program should give the same result as coeftest(vt.mod, vcovCLvt)
  source(paste(codes,"estfunjg.R",sep=""))
  estfunjgmn <- estfunjg(ml.m.w,fformula1)
  vcovmn <- vcov(ml.m.w)
  Lmn <- length(unique(ml.m.w$model$alt))
  Mmn <- length(unique(ml.m.w$model$IDSocial))
  Nmn <- length(ml.m.w$model$IDSocial)/Lmn
  Kmn <- length(coefficients(ml.m.w))
  #dfc <- (Mmn/(Mmn-1))*((Nmn-1)/(Nmn-Kmn))
  dfcmn <- (Mmn/(Mmn-1))
  require(lmtest)
  cluster <- ml.m.w$model$IDSocial[c(1:Nmn)]
  cluster <- as.numeric(cluster)
  ujmn  <- apply(estfunjgmn,2, function(x) tapply(x, cluster, sum)) #cluster at parish level, but given fe there is negative intracluster correlation
  breadmn <- vcovmn * length(residuals(ml.m.w))
  meatmn <-  (crossprod(ujmn)/Nmn)
  nmn <- NROW(estfunjgmn)
  vcovCLmn <- dfcmn*(1/nmn * (breadmn %*% meatmn %*% breadmn))
  print(coeftest(ml.m.w, vcovCLmn))
  sink()
}

###########################################
## HOMOGENEOUS
## 1. Without contextual effects
###########################################
#AM <- 0
rootsave <- paste(root,"Results/ReStat/",sep="")
#if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
indexexer <- "w50list"
#for(indexexer in c("w50list")) {
sink(paste(rootsave,"KS/NAPPEstimationHomowfenewdeltagammaCluster",indexexer,".txt",sep="")) 
options(max.print=1000000)
gc()
#to get IDSocial as cluster
load(paste(rootsave,"KS/PMLmtwfinalnew",indexexer,".RData",sep=""))
cluster <- ml.m.w$model$IDSocial
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
wvar <- "sw"
woexer <- c("wodelta")
load(paste(rootsave,"KS/","PMLmwhomofinalwodelta.RData",sep=""))
fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+","factor(IDCivil)"),"|", wvar ,sep="")))            
print(paste("Additional exercises",woexer,sep=""))
######################
# To get gradient matrix (Empirical Estimating Functions) from mnlogit object
#The estimating function (or score function) for a model 
#is the derivative of the objective function with respect 
#to the parameter vector. The empirical estimating functions 
#is the evaluation of the estimating function at the observed data 
#(n observations) and the estimated parameters (of dimension k)
#########################
#for mnlogit the below program should give the same result as coeftest(vt.mod, vcovCLvt)
source(paste(codes,"estfunjg.R",sep=""))
estfunjgmn <- estfunjg(ml.m.w,fformula1)
vcovmn <- vcov(ml.m.w)
Lmn <- length(unique(ml.m.w$model$alt))
Mmn <- length(unique(cluster))
Nmn <- length(cluster)/Lmn
Kmn <- length(coefficients(ml.m.w))
#dfc <- (Mmn/(Mmn-1))*((Nmn-1)/(Nmn-Kmn))
dfcmn <- (Mmn/(Mmn-1))
require(lmtest)
cluster <- cluster[c(1:Nmn)]
cluster <- as.numeric(cluster)
ujmn  <- apply(estfunjgmn,2, function(x) tapply(x, cluster, sum)) #cluster at parish level, but given fe there is negative intracluster correlation
breadmn <- vcovmn * length(residuals(ml.m.w))
meatmn <-  (crossprod(ujmn)/Nmn)
nmn <- NROW(estfunjgmn)
vcovCLmn <- dfcmn*(1/nmn * (breadmn %*% meatmn %*% breadmn))
print(coeftest(ml.m.w, vcovCLmn))
sink()
